cd "C:\Causal Analysis\data\" // the path should be changed by the user *Chapter 3 *1 clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen D = assignment // define treatment (assignment to JC) gen Y = earny4 // define outcome (earnings in fourth year) summarize Y if D == 1 // compute mean outcome for treated group scalar mean1 = r(mean) // store treated-group mean summarize Y if D == 0 // compute mean outcome for control group scalar mean0 = r(mean) // store control-group mean display mean1 - mean0 // compute the ATE *2 regress Y D, vce(hc3) // OLS regression with heteroscedasticity-robust se *3 bootstrap b_D = _b[D], reps(1999) seed(1): regress Y D // estimate coefficient on D using bootstrap with 1999 replications display _b[b_D] / _se[b_D] // compute the t-statistic display 2 * normal(-abs(_b[b_D] / _se[b_D])) // compute the p-value *4 clear all // clear all data and stored results from memory import delimited "wexpect.csv" // load wexpect data gen D1 = treatmentinformation // define first treatment (wage information) gen D2 = treatmentorder // define second treatment (order of questions) gen Y = wexpect2 // define outcome (wage expectations) regress Y D1 D2, vce(hc3) // OLS regression with heteroscedasticity-robust se *5 clear all // clear all data and stored results from memory import delimited "marketing.csv" // load marketing data gen D = newspaper // define treatment (newspaper advertising) gen Y = sales // define outcome (sales) npregress kernel Y D, /// // kernel regression estimator(constant) kernel(gaussian) noderivatives margins, at(D=(0(1)140)) /// // predict Y at D = 0–140 vce(bootstrap, reps(999) seed(1)) marginsplot, /// // plot regression function recast(line) recastci(rline) ciop(lp(dash)) /// yscale(range(12 26)) ylabel(12(2)26) /// xscale(range(0 140)) xlabel(0(20)140) /// title("") ytitle("Y") npregress kernel Y D, kernel(gaussian) meanbw(18.31489, copy) // kernel regression margins, dydx(D) at(D=(0(1)140)) reps(999) seed(1) // marginal effect of D on Y at D = 0–140 marginsplot, /// // plot effects recast(line) recastci(rline) ciop(lp(dash)) /// yscale(range(-0.2 0.2)) ylabel(-0.2(0.1)0.2) /// xscale(range(0 140)) xlabel(0(20)140) /// title("") ytitle("Gradient component 1 of Y") *6 clear all // clear all data and stored results from memory import delimited "coffeeleaflet.csv" // load coffeeleaflet data gen D = treatment // define treatment (leaflet) gen Y = awarewaste // define outcome (aware of waste production) regress Y D mumedu sex, vce(hc3) // OLS regression with heteroscedasticity-robust se *Chapter 4 *7 clear all // clear all data and stored results from memory import delimited "lalonde.csv" // load lalonde data gen D = treat // define treatment (training) gen Y = re78 // define outcome local X age educ nodegr married black hisp re74 re75 u74 u75 // covariates local DXdemeaned // initialize empty list for interactions foreach v of local X { // loop over covariates quietly summarize `v' // calculate mean of current covariate gen D_dm_`v' = D * (`v' - r(mean)) // interaction of D and demeaned X local DXdemeaned `DXdemeaned' D_dm_`v' // add interaction to list } reg Y D `X' `DXdemeaned', vce(hc3) // OLS regression with heteroscedasticity-robust se *8 clear all // clear all data and stored results from memory import delimited "lalonde.csv" // load lalonde data gen D = treat // define treatment gen Y = re78 // define outcome local X age educ nodegr married black hisp re74 re75 u74 u75 // covariates teffects nnmatch (Y `X') (D), atet nneighbor(1) metric(ivariance) vce(iid) // pair matching *9 local X age educ nodegr married black hisp re74 re75 u74 u75 // covariates teffects nnmatch (Y `X') (D), atet nneighbor(3) metric(ivariance) biasadj(`X') vce(iid) // 1:M matching *10 clear all // clear all data and stored results from memory import delimited "lalonde.csv" // load lalonde data gen D = treat // define treatment gen Y = re78 // define outcome local X age educ nodegr married black hisp re74 re75 u74 u75 // covariates logit D `X' // estimate propensity score by logit predict ps, pr // save fitted propensity score teffects nnmatch (Y ps) (D), atet biasadj(ps) metric(ivariance) vce(iid) // propensity score matching *11 clear all // clear all data and stored results from memory import delimited "lalonde.csv" // load lalonde data gen D = treat // define treatment gen Y = re78 // define outcome capture program drop boot_psmatch // drop program bs if it already exists program define boot_psmatch, rclass // define bootstrap program returning r() logit D age educ nodegr married black hisp re74 re75 u74 u75 // estimate propensity score by logit predict ps, pr // save fitted propensity score teffects nnmatch (Y ps) (D), atet biasadj(ps) metric(ivariance) vce(iid) // propensity score matching matrix b = e(b) // store estimated coefficients return scalar effect = b[1,1] // return estimated ATET drop ps // drop propensity score before next replication end // close program bs set seed 1 // set seed to 1 bootstrap effect=r(effect), reps(999): boot_psmatch // 999 bootstrap estimations *12 clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create dummy variable: 1 if race is white, 0 otherwise local X white age lwt ptl ht ui ftv // covariates teffects ipw (Y) (D `X', probit), vce(bootstrap, reps(999) seed(1)) // IPW with 999 bootstraps *13 ssc install psweight, replace // install psweight package psweight cbpsoid D `X' // covariate balancing for ATE estimation regress Y D [pw=_weight], vce(robust) // weighted regression *14 clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create dummy variable: 1 if race is white, 0 otherwise local X white age lwt ptl ht ui ftv // covariates net install st0290, from(http://www.stata-journal.com/software/sj13-1) // install drglm package drglm Y D, outcome(`X') exposure(`X') elink(logit) // DR reg *15 clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create dummy variable: 1 if race is white, 0 otherwise local X white age lwt ptl ht ui ftv // covariates logit D `X' // estimate propensity score by logit predict ps, pr // save fitted propensity score kdensity ps if D == 1, name(kdens1, replace) // density of propensity score among treated kdensity ps if D == 0, name(kdens0, replace) // density of propensity score among non-treated histogram ps if D == 1, frequency width(0.1) start(0) name(hist1, replace) // plot histogram of p-score for treated histogram ps if D == 0, frequency width(0.1) start(0) name(hist0, replace) // plot histogram of p-score for non-treated graph combine kdens1 kdens0 hist1 hist0, rows(2) // combine four graphs in 2x2 layout summarize ps if D == 1 // summary statistics for p-scores among treated summarize ps if D == 0 // summary statistics p-scores among non-treated *16 ssc install psmatch2, replace // install psmatch2 package clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create race dummy: 1 if race is white, 0 otherwise local X white age lwt ptl ht ui ftv // covariates psmatch2 D `X', outcome(Y) logit noreplacement descending // pair matching (ATET) on propensity score histogram _pscore if D == 1, /// // plot ps among treated fraction title("Raw Treated") name(rt, replace) histogram _pscore if D == 0, /// // plot ps among non-treated fraction title("Raw Control") name(rc, replace) histogram _pscore if D == 1 & _support == 1, /// // plot ps after matching among treated fraction title("Matched Treated") name(mt, replace) histogram _pscore [fw=_weight] if D == 0 & _weight < ., /// // plot ps after matching among non-treated fraction title("Matched Control") name(mc, replace) graph combine rt mt rc mc, col(2) // combine four graphs in 2x2 layout pstest `X', both // compare covariate balance before and after matching *17 logit D `X' // estimate propensity score by logit predict ps, pr // save fitted propensity score psmatch2 D, outcome(Y) pscore(ps) // pair matching (ATET) on propensity score pstest ptl, both // covariate balance before/after matching psmatch2 D, outcome(Y) pscore(ps) common // pair matching (ATET) pstest lwt, both // covariate balance before/after matching *18 clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create race dummy: 1 if race is white, 0 otherwise capture program drop ipw_ptl_trim // drop program if it already exists program define ipw_ptl_trim, rclass // define bootstrap program tempvar ps w1 w0 // define temporary variables local X white age lwt ptl ht ui ftv // covariates probit D `X' // estimate propensity score by probit predict `ps', pr // predict p-score quietly summarize ptl [aw = D/`ps'] if inrange(`ps', 0.05, 0.95), meanonly // weighted mean for treated scalar mu1 = r(mean) // store treated mean quietly summarize ptl [aw = (1-D)/(1-`ps')] if inrange(`ps', 0.05, 0.95), meanonly // weighted mean for controls scalar mu0 = r(mean) // store control mean return scalar effect = mu1 - mu0 // return IPW mean difference end // close program set seed 1 // set seed to 1 bootstrap effect = r(effect), reps(999): ipw_ptl_trim // bootstrap IPW estimator *19 clear all // clear all data and stored results from memory import delimited "lbw.csv" // load lbw data gen D = smoke // define treatment (mother smoking) gen Y = bwt // outcome (birthweight in grams) gen white = race == 1 // create race dummy: 1 if race is white, 0 otherwise local X white age lwt ptl ht ui ftv // covariates probit D `X' // estimate propensity score by probit predict ps0, pr // predict p-score count if (ps0 < 0.1 | ps0 > 0.9) // count observations trimmed due to extreme p-scores capture program drop ipw_ptl_trim // drop program if it already exists program define ipw_ptl_trim, rclass // define bootstrap program tempvar ps w1 w0 // define temporary variables local X white age lwt ptl ht ui ftv // covariates probit D `X' // estimate propensity score by probit predict `ps', pr // predict p-score quietly summarize ptl [aw = D/`ps'] if inrange(`ps', 0.1, 0.9), meanonly // weighted mean for treated scalar mu1 = r(mean) // store treated mean quietly summarize ptl [aw = (1-D)/(1-`ps')] if inrange(`ps', 0.1, 0.9), meanonly // weighted mean for controls scalar mu0 = r(mean) // store control mean return scalar effect = mu1 - mu0 // return IPW mean difference end // close program set seed 1 // set seed to 1 bootstrap effect = r(effect), reps(999): ipw_ptl_trim // bootstrap IPW estimator *20 //no Stata code available *21 ssc install moremata, replace // install moremata package ssc install kdens, replace // install kdens package net install ivqte, from("https://raw.githubusercontent.com/bmelly/Stata/main/") // install ivqte package clear all // clear all data and stored results from memory import delimited "games.csv" // load games data egen miss = rowmiss(_all) // count missings in each row drop if miss > 0 // drop observations with missings drop miss // drop missing-count variable gen D = (metascore > 75) // define binary treatment gen Y = sales // define outcome gen action = (genre == "Action") // create dummy for the genre "Action" ivqte Y year i.action userscore (D), quantiles(0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95) variance // estimate QTE across different ranks matrix B = e(b) // store QTE estimates matrix V = e(V) // store variance matrix matrix Q = e(quantiles) // store quantiles preserve // keep current data in memory clear // clear data to create plot dataset local k = colsof(B) // number of quantiles set obs `k' // create one row per quantile gen tau = . // quantile variable gen qte = . // QTE estimate gen se = . // standard error forvalues i = 1/`k' { // extract quantiles, estimates and SEs replace tau = Q[1,`i'] in `i' replace qte = B[1,`i'] in `i' replace se = sqrt(V[`i',`i']) in `i' } gen ci_low = qte - 1.96*se // lower confidence interval gen ci_high = qte + 1.96*se // upper confidence interval twoway /// // plot QTEs with confidence band (connected qte tau, sort lcolor(black) mcolor(black) msize(vsmall)) /// (line ci_low tau, sort lcolor(black) lpattern(dash)) /// (line ci_high tau, sort lcolor(black) lpattern(dash)), /// title("") xtitle("tau") ytitle("QTE") legend(off) restore // restore original data *22 //no Stata code available *23 //no Stata code available *24 //no Stata code available *Chapter 5 *25 net install ddml, from(https://raw.githubusercontent.com/aahrens1/ddml/master) replace // install ddml package ssc install lassopack, replace // install lassopack package ssc install pdslasso, replace // install pdslasso package clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen D = trainy1 // define treatment (training) in first year gen Y = health48 // define outcome (health state after 4 years) set seed 1 // set seed to 1 ddml init interactive, kfolds(3) // initialize DML for binary treatment ddml E[Y|X,D]: cvlasso Y D female-alcoholmis, lopt // outcome model by cross-validated lasso ddml E[D|X]: cvlasso D female-alcoholmis, lopt // treatment model by cross-validated lasso ddml crossfit // cross-fit nuisance models ddml estimate, robust // estimate DML treatment effect *26 ssc install rforest, replace // install rforest package clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen D = trainy1 // define treatment (training) in first year gen Y = health48 // define outcome (health state after 4 years) ddml init interactive, kfolds(3) // initialize DML for binary treatment ddml E[Y|X,D], vtype(none): rforest Y D female-alcoholmis, type(reg) // random forest outcome model ddml E[D|X], vtype(none): rforest D female-alcoholmis, type(reg) // random forest treatment model ddml crossfit // cross-fit nuisance models ddml estimate, robust // estimate DML effect *27 clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen D = trainy1 // define treatment (training) in first year gen Y = pworky3 // outcome (proportion employed in third year) cate aipw (Y female-alcoholmis) (D), omethod(rforest) tmethod(rforest) rseed(1) // run causal forest *28 categraph histogram, frequency // distribution of CATEs *29 predict CATE // store estimated IATE/CATE predictions summarize CATE, detail // compute median of estimated CATEs local medCATE = r(p50) // store median CATE gen highCATE = CATE > `medCATE' // dummy for high CATE regress age highCATE, vce(hc3) // regress CATEs on age *30 clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen D = trainy1 // define treatment (training) in first year gen Y = pworky3 // outcome (proportion employed in third year) cate aipw (Y female-alcoholmis) (D), omethod(rforest) tmethod(rforest) rseed(1) // run causal forest estat projection female // regression of function on gender *31 predict CATE // store estimated IATE/CATE predictions rforest CATE female-alcoholmis, type(reg) // predict CATE as a function of X matrix list e(importance) // show predictive importance of X *32 //no Stata code available *Chapter 6 *33 clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen Z = assignment // define instrument (assignment to JC) gen D = trainy1 // define treatment (training in 1st year) gen Y = earny4 // define outcome (earnings in fourth year) summarize Y if Z == 1, meanonly // mean outcome among assigned scalar meanY1 = r(mean) // store mean outcome among assigned summarize Y if Z == 0, meanonly // mean outcome among non-assigned scalar meanY0 = r(mean) // store mean outcome among non-assigned scalar ITT = meanY1 - meanY0 // estimate intention-to-treat effect (ITT) summarize D if Z == 1, meanonly // mean treatment among assigned scalar meanD1 = r(mean) // store mean treatment among assigned summarize D if Z == 0, meanonly // mean treatment among non-assigned scalar meanD0 = r(mean) // store mean treatment among non-assigned scalar first = meanD1 - meanD0 // estimate first stage effect (complier share) scalar LATE = ITT / first // compute LATE display ITT // show ITT display first // show first stage display LATE // show LATE *34 ivregress 2sls Y (D = Z), vce(robust) // run two stage least squares regression *35 clear all // clear all data and stored results from memory import delimited "c401k.csv" // load 401(k) pension data gen D = p401k // treatment: participation in pension plan gen Z = e401k // instrument: eligibility for pension plan gen Y = nettfa // outcome: net financial assets in 1000 USD lateffects kappa (Y) (D) (Z inc-fsize), vce(bootstrap, reps(299)) // compute LATE (299 bootstraps) *36 set seed 1 // set seed to 1 ddml init interactiveiv, kfolds(2) // initialize DML LATE model ddml E[Y|X,Z]: pystacked Y inc-fsize Z, type(reg) /// // outcome model methods(ols lassocv ridgecv elasticcv rf gradboost) ddml E[D|X,Z]: pystacked D inc-fsize Z, type(class) /// // treatment model methods(logit lassocv ridgecv elasticcv rf gradboost) ddml E[Z|X]: pystacked Z inc-fsize, type(class) /// // instrument propensity methods(logit lassocv ridgecv elasticcv rf gradboost) ddml crossfit // cross-fit nuisance models ddml estimate, robust // estimate DML LATE *37 ssc install mtefe, replace // install mtefe package ssc install nearmrg, replace // install nearmrg package clear all // clear all data and stored results from memory import delimited "toydata.csv" // load toydata gen D = (d=="TRUE") // define binary treatment gen Z = z // define continuous instrument gen Y = y // define outcome gen X = x // define covariate mtefe Y X (D = Z), semiparametric gridpoints(100) bootreps(299) // LIV estimation of MTE mtefeplot, memory // plot MTE curve *Chapter 7 *38 clear all // clear all data from memory import delimited "kielmc.csv" // load kielmc data gen Y = rprice // define outcome gen D = nearinc // define treatment group gen T = y81 // define period dummy gen interact = D*T // treatment-period interaction regress Y D T interact, vce(cluster cbd) // DiD results with cluster st.error *39 //no Stata code available *40 ssc install csdid, replace // install csdid package ssc install drdid, replace // install drdid package clear all // clear all data and stored results from memory import delimited "mpdta.csv" // load mpdta data csdid lemp lpop, time(year) gvar(first_treat) ivar(countyreal) // doubly robust did csdid_plot, group(2004) // plot DiD results for the 2004 group csdid_plot, group(2006) // plot DiD results for the 2006 group csdid_plot, group(2007) // plot DiD results for the 2007 group estat group // group-specific aggregation *41 ssc install cic, replace // install cic package clear all // clear all data and stored results from memory import delimited "kielmc.csv" // load kielmc data gen T = (year == 1981) // post-treatment period dummy cic continuous rprice nearinc T, at(5(5)95) vce(bootstrap, reps(299)) // Changes-in-Changes QTETs matrix results = r(table)' // transpose result matrix clear // clear current data svmat results, names(col) // convert matrix to variables drop in 1 // drop first row, which contains mean effect gen q = 5*_n/100 // tau twoway /// // plot QTETs (line ll q, sort lpattern(dash) lcolor(black)) /// (line ul q, sort lpattern(dash) lcolor(black)) /// (connected b q, sort lcolor(black) mcolor(black) msize(vsmall)), /// xtitle("tau") ytitle("QTET") /// legend(off) /// graphregion(color(white)) plotregion(color(white)) *Chapter 8 *42 ssc install sdid, replace // install sdid package clear all // clear all data and stored results from memory import delimited "california_prop99.csv" // load smoking data set seed 1 // set seed to 1 sdid packspercapita state year treated, vce(placebo) // synthetic DiD *43 set seed 1 // set seed to 1 sdid packspercapita state year treated, method(sc) vce(placebo) graph // synthetic control with placebo SE and plot *Chapter 9 *44 ssc install rdrobust, replace // install rdrobust package clear all // clear all data and stored results from memory import delimited "rdrobust_RDsenate.csv", clear // data on elections for US Senate gen Y = vote // outcome is vote share of Democrats gen R = margin // running variable is margin of winning rdrobust Y R // sharp RDD rdplot Y R // plot outcome against running variable *45 ssc install rddensity // install rddensity package rddensity R // run McCrary-type density test *46 clear all // clear all data and stored results from memory import delimited "rcp.csv", clear // load rcp data gen Y = cn // outcome is expenditures on non-durables gen R = elig_year // running var based on eligibility to retire gen D = (retired=="TRUE") // treatment is retirement status rdrobust Y R, fuzzy(D) // fuzzy RDD *47 clear all // clear all data from memory import delimited "rkd.csv", clear // load rkd data gen y = pers_total // define outcome (total personnel) gen r = forcing // define running variable gen d = costequalgrants // define treatment (grants) rdrobust y r, fuzzy(d) deriv(1) p(1) // run fuzzy RKD *48 //no Stata code available *Chapter 10 *49 //no Stata code available *50 ssc install leebounds, replace // install leebounds package clear all // clear all data and stored results from memory import delimited "JC.csv" // load JC data gen treat = assignment // random treatment (assignment to JC) gen outcome = earny4 // define outcome (earnings in 4. year) gen selection = (pworky4 > 0) // sample selection: employed in 4. year replace outcome = . if selection == 0 // recode non-selected outcomes as NA leebounds outcome treat, select(selection) // bounds on ATE under monotonicity *51 ssc install psmatch2, replace // install psmatch2 package ssc install rbounds, replace // install rbounds package clear all // clear all data and stored results from memory import delimited "lalonde.csv" // load lalonde data gen D = treat // define treatment (training) gen Y = re78 // define outcome local X age educ nodegr married black hisp re74 re75 u74 u75 // covariates set seed 1 // set seed to 1 psmatch2 D `X',outcome(Y) noreplacement // pair matching (ATET),no replacement gen diff = Y - _Y if _treated == 1 & _support == 1 // reated-control outcome difference rbounds diff, gamma(1(0.25)2) // sensitivity analysis *Chapter 11 *52 //no Stata code available